vignettes/example-gallery-05-scatter-plots.Rmd
example-gallery-05-scatter-plots.RmdThis document is adapted from the Bar Charts section of the Altair Example Gallery.
Our first step is to set up our environment:
# devtools::install_github("vegawidget/altair")
library("altair")
library("tibble")
vega_data <- import_vega_data()glimpse(vega_data$barley())
#> Observations: 120
#> Variables: 4
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "G...
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "M...
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, ...
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.9...chart <-
alt$Chart(r_to_py(vega_data$barley()))$
mark_point()$
encode(
x = alt$X("median(yield)", scale=alt$Scale(zero=FALSE)),
y = alt$Y(
"variety",
sort = alt$SortField(
field = "yield",
op = "median",
order = "descending"
),
scale = alt$Scale(rangeStep = 20)
),
color = "year:N",
row = "site"
)
chartmovies <- vega_data$movies()
glimpse(movies)
#> Observations: 3,201
#> Variables: 16
#> $ Creative_Type <list> [NULL, NULL, NULL, NULL, "Contemporary...
#> $ Director <list> [NULL, NULL, NULL, NULL, NULL, NULL, "...
#> $ Distributor <list> ["Gramercy", "Strand", "Lionsgate", "F...
#> $ IMDB_Rating <dbl> 6.1, 6.9, 6.8, NaN, 3.4, NaN, 7.7, 3.8,...
#> $ IMDB_Votes <dbl> 1071, 207, 865, NaN, 165, NaN, 15133, 3...
#> $ MPAA_Rating <list> ["R", "R", NULL, NULL, "R", NULL, "R",...
#> $ Major_Genre <list> [NULL, "Drama", "Comedy", "Comedy", "D...
#> $ Production_Budget <dbl> 8000000, 300000, 250000, 300000, 100000...
#> $ Release_Date <list> ["12-Jun-98", "7-Aug-98", "28-Aug-98",...
#> $ Rotten_Tomatoes_Rating <dbl> NaN, NaN, NaN, 13, 62, NaN, NaN, NaN, 2...
#> $ Running_Time_min <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
#> $ Source <list> [NULL, NULL, NULL, NULL, "Original Scr...
#> $ Title <list> ["The Land Girls", "First Love, Last R...
#> $ US_DVD_Sales <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
#> $ US_Gross <dbl> 146083, 10876, 203134, 373615, 1009819,...
#> $ Worldwide_Gross <dbl> 146083, 10876, 203134, 373615, 1087521,...glimpse(vega_data$cars())
#> Observations: 406
#> Variables: 9
#> $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
#> $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
#> $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
#> $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
#> $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
#> $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
#> $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
#> $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...glimpse(vega_data$driving())
#> Observations: 55
#> Variables: 4
#> $ gas <dbl> 2.38, 2.40, 2.26, 2.31, 2.27, 2.25, 2.22, 2.12, 2.11, 2....
#> $ miles <dbl> 3675, 3706, 3766, 3905, 3935, 3977, 4085, 4218, 4369, 45...
#> $ side <chr> "left", "right", "bottom", "top", "right", "bottom", "ri...
#> $ year <dbl> 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 19...lines <-
alt$Chart(r_to_py(vega_data$driving()))$
mark_line()$
encode(
x = alt$X("miles", scale = alt$Scale(zero = FALSE)),
y = alt$Y("gas", scale = alt$Scale(zero = FALSE)),
order="year"
)
points <-
alt$Chart(r_to_py(vega_data$driving()))$
mark_circle()$
encode(
alt$X("miles", scale = alt$Scale(zero = FALSE)),
alt$Y("gas", scale = alt$Scale(zero = FALSE))
)
chart <- (lines + points)
chartInspired by Edward Tufte’s Visual Display of Quantitative Information (p. 133), this is based on g3o2’s block. It is also interactive, you can make an interval selection on the scatterplot.
glimpse(vega_data$cars())
#> Observations: 406
#> Variables: 9
#> $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
#> $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
#> $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
#> $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
#> $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
#> $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
#> $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
#> $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...cars <- r_to_py(vega_data$cars())
brush <- alt$selection(type = "interval")
tick_axis <- alt$Axis(labels=FALSE, domain=FALSE, ticks=FALSE)
tick_axis_notitle <-
alt$Axis(labels=FALSE, domain=FALSE, ticks=FALSE, title="")
points <-
alt$Chart(cars)$
mark_point()$
encode(
x = alt$X("Miles_per_Gallon", axis = alt$Axis(title = "")),
y = alt$Y("Horsepower", axis = alt$Axis(title = "")),
color = alt$condition(brush, "Origin", alt$value("grey"))
)$
properties(selection = brush)
x_ticks <-
alt$Chart(cars)$
mark_tick()$
encode(
x = alt$X("Miles_per_Gallon", axis = tick_axis),
y = alt$Y("Origin", axis = tick_axis_notitle),
color = alt$condition(brush, "Origin", alt$value("lightgrey"))
)$
properties(selection = brush)
y_ticks <-
alt$Chart(cars)$
mark_tick()$
encode(
alt$X("Origin", axis = tick_axis_notitle),
alt$Y("Horsepower", axis = tick_axis),
color=alt$condition(brush, "Origin", alt$value("lightgrey"))
)$
properties(selection = brush)
chart <- (y_ticks | (points & x_ticks))
chartThis example is modified from an example in Lisa Charlotte Rost’s blog post ‘One Chart, Twelve Charting Libraries’.
glimpse(vega_data$gapminder_health_income())
#> Observations: 187
#> Variables: 4
#> $ country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "An...
#> $ income <dbl> 1925, 10620, 13434, 46577, 7615, 21049, 17344, 7763...
#> $ health <dbl> 57.63, 76.00, 76.50, 84.10, 61.00, 75.20, 76.20, 74...
#> $ population <dbl> 32526562, 2896679, 39666519, 70473, 25021974, 91818...glimpse(vega_data$iris())
#> Observations: 150
#> Variables: 5
#> $ petalLength <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, ...
#> $ petalWidth <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, ...
#> $ sepalLength <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, ...
#> $ sepalWidth <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, ...
#> $ species <chr> "setosa", "setosa", "setosa", "setosa", "setosa", ...chart <-
alt$Chart(r_to_py(vega_data$disasters()))$
mark_circle(opacity = 0.8, stroke = "black", strokeWidth = 1)$
encode(
x = alt$X("Year:O", axis = alt$Axis(labelAngle = 0)),
y = alt$Y("Entity:N"),
size = alt$Size(
"Deaths:Q",
scale = alt$Scale(range = c(0, 5000)),
legend = alt$Legend(title = "Annual Global Deaths")
),
color = alt$Color("Entity:N", legend = NULL)
)$
properties(
width = 480,
height = 350
)$
transform_filter("datum.Entity != 'All natural disasters'")
chartglimpse(vega_data$cars())
#> Observations: 406
#> Variables: 9
#> $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
#> $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
#> $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
#> $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
#> $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
#> $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
#> $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
#> $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...chart <-
alt$Chart(r_to_py(vega_data$cars()))$
mark_circle()$
encode(
x = alt$X(alt$`repeat`("column"), type = "quantitative"),
y = alt$Y(alt$`repeat`("row"), type = "quantitative"),
color = "Origin:N"
)$
properties(width = 200, height = 200)$
`repeat`(
row = list("Horsepower", "Acceleration", "Miles_per_Gallon"),
column = list("Miles_per_Gallon", "Acceleration", "Horsepower")
)$
interactive()
chartLayering can now work with the + operator as it does in Python. Additionally, it can be declared as alt$layer(chart1, chart2) or as alt$ChartLayer(layer = list(chart1, chart2))
This example demonstrates the need for a ggplot2::facet_wrap()-like capability, which we understand is coming to Vega-Lite in the no-so-distant future.
glimpse(vega_data$movies())
#> Observations: 3,201
#> Variables: 16
#> $ Creative_Type <list> [NULL, NULL, NULL, NULL, "Contemporary...
#> $ Director <list> [NULL, NULL, NULL, NULL, NULL, NULL, "...
#> $ Distributor <list> ["Gramercy", "Strand", "Lionsgate", "F...
#> $ IMDB_Rating <dbl> 6.1, 6.9, 6.8, NaN, 3.4, NaN, 7.7, 3.8,...
#> $ IMDB_Votes <dbl> 1071, 207, 865, NaN, 165, NaN, 15133, 3...
#> $ MPAA_Rating <list> ["R", "R", NULL, NULL, "R", NULL, "R",...
#> $ Major_Genre <list> [NULL, "Drama", "Comedy", "Comedy", "D...
#> $ Production_Budget <dbl> 8000000, 300000, 250000, 300000, 100000...
#> $ Release_Date <list> ["12-Jun-98", "7-Aug-98", "28-Aug-98",...
#> $ Rotten_Tomatoes_Rating <dbl> NaN, NaN, NaN, 13, 62, NaN, NaN, NaN, 2...
#> $ Running_Time_min <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
#> $ Source <list> [NULL, NULL, NULL, NULL, "Original Scr...
#> $ Title <list> ["The Land Girls", "First Love, Last R...
#> $ US_DVD_Sales <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
#> $ US_Gross <dbl> 146083, 10876, 203134, 373615, 1009819,...
#> $ Worldwide_Gross <dbl> 146083, 10876, 203134, 373615, 1087521,...